package com.darrenchan.spark.rdd

import org.apache.spark.{SparkConf, SparkContext}

/**
  * 统计log中用户ID次数最多的Top3
  * 输出uid和次数
  */
object TopN {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TopN")

    val sc = new SparkContext(sparkConf)

    val ss = sc.textFile("log.txt").map(_.split(" ")(1)).
      map((_, 1)).
      reduceByKey(_+_).
      map(x => (x._2, x._1)).
      sortByKey(false).
      map(x => (x._2, x._1)).
      take(3)

    for (ii <- ss) {
      println(ii)
    }

    sc.stop()
  }
}
